Differential expression using the bulk RNAseq data from the May release.
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(stringr)
library(MASS)
library(RColorBrewer)
library(DESeq2)
library(viridis)
library(ggpointdensity)
library(dplyr)
library(data.table)
library(ComplexHeatmap)
library(UpSetR)
library(readxl)
theme_set(theme_classic())
#path = "~/research/data/MorPhiC/May-2024/JAX_RNAseq2_ExtraEmbryonic/"
path = "../../MorPhiC/data/May-2024/JAX_RNAseq2_ExtraEmbryonic/"
dat_path = paste0(path, "processed/release110-gencode44/Tables")meta = fread("data/JAX_RNAseq2_ExtraEmbryonic_meta_data.tsv",
data.table = FALSE)
dim(meta)## [1] 82 32
meta[1:2,]## biomaterial_id lib_biomaterial_id differentiated_biomaterial_id
## 1 MOK20011-WT1 GT23-13719 PrS-MOK20011-WT1
## 2 MOK20011-WT2 GT23-13720 PrS-MOK20011-WT2
## differentiated_biomaterial_description differentiation_protocol
## 1 KOLF2.2 derived primitive syncytium JAXPD001
## 2 KOLF2.2 derived primitive syncytium JAXPD001
## differentiated_timepoint_value differentiated_timepoint_unit
## 1 6 days
## 2 6 days
## differentiated_terminally_differentiated
## 1 Yes
## 2 Yes
## model_organ ko_strategy ko_gene
## 1 extra-embryonic primitive syncytial cells WT WT
## 2 extra-embryonic primitive syncytial cells WT WT
## library_preparation_protocol dissociation_protocol fragment_size input_amount
## 1 JAXPL001 N.A 417 300
## 2 JAXPL001 N.A 402 300
## input_unit final_yield final_yield_unit lib_concentration
## 1 ngs 190.0 ngs 34.51784
## 2 ngs 91.2 ngs 17.18679
## lib_concentration_unit PCR_cycle PCR_cycle_sample_index
## 1 nM 10 NA
## 2 nM 10 NA
## file_id sequencing_protocol
## 1 RUNX1_WT1_GT23-13719_CCAAGTCT-TCATCCTT_S170_L003 JAXPS001
## 2 RUNX1_WT2_GT23-13720_TTGGACTC-CTGCTTCC_S157_L003 JAXPS001
## run_id biomaterial_description derived_cell_line_accession
## 1 20231124_23-robson-013 KOLF2.2 KOLF2.2J
## 2 20231124_23-robson-013 KOLF2.2 KOLF2.2J
## clone_id protocol_id zygosity type model_system
## 1 WT1 N.A N.A iPSC PrS
## 2 WT2 N.A N.A iPSC PrS
names(meta)## [1] "biomaterial_id"
## [2] "lib_biomaterial_id"
## [3] "differentiated_biomaterial_id"
## [4] "differentiated_biomaterial_description"
## [5] "differentiation_protocol"
## [6] "differentiated_timepoint_value"
## [7] "differentiated_timepoint_unit"
## [8] "differentiated_terminally_differentiated"
## [9] "model_organ"
## [10] "ko_strategy"
## [11] "ko_gene"
## [12] "library_preparation_protocol"
## [13] "dissociation_protocol"
## [14] "fragment_size"
## [15] "input_amount"
## [16] "input_unit"
## [17] "final_yield"
## [18] "final_yield_unit"
## [19] "lib_concentration"
## [20] "lib_concentration_unit"
## [21] "PCR_cycle"
## [22] "PCR_cycle_sample_index"
## [23] "file_id"
## [24] "sequencing_protocol"
## [25] "run_id"
## [26] "biomaterial_description"
## [27] "derived_cell_line_accession"
## [28] "clone_id"
## [29] "protocol_id"
## [30] "zygosity"
## [31] "type"
## [32] "model_system"
table(table(meta$biomaterial_id))##
## 1
## 82
table(table(meta$lib_biomaterial_id))##
## 1
## 82
table(table(meta$differentiated_biomaterial_id))##
## 1
## 82
cts = fread(file.path(dat_path, "genesCounts.csv"), data.table = FALSE)
dim(cts)## [1] 38592 83
cts[1:2, c(1:2, (ncol(cts)-1):ncol(cts))]## Name MEIS2_CE_A06_GT23-12174_TGCGAGAC-CAACAATG_S1_L001
## 1 ENSG00000268674 0
## 2 ENSG00000271254 793
## MEF2C_PTC_B03_GT23-13717_TCTCTACT-GAACCGCG_S177_L003
## 1 0
## 2 937
## RUNX1_KO_G07_GT23-13724_CGGCGTGA-ACAGGCGC_S175_L003
## 1 0
## 2 1079
stopifnot(setequal(names(cts)[-1], meta$file_id))
meta = meta[match(names(cts)[-1], meta$file_id),]
table(names(cts)[-1] == meta$file_id)##
## TRUE
## 82
cts_dat = data.matrix(cts[,-1])
rownames(cts_dat) = cts$Namegene_anno = fread("data/gencode_v44_primary_assembly_info.tsv")
dim(gene_anno)## [1] 62754 8
gene_anno[1:2,]## geneId chr strand start end ensembl_gene_id
## <char> <char> <char> <int> <int> <char>
## 1: ENSG00000000003.16 chrX - 100627108 100639991 ENSG00000000003
## 2: ENSG00000000005.6 chrX + 100584936 100599885 ENSG00000000005
## hgnc_symbol description
## <char> <char>
## 1: TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]
## 2: TNMD tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]
table(rownames(cts_dat) %in% gene_anno$ensembl_gene_id)##
## TRUE
## 38592
# all ensembl gene IDs are contained in the annotation
setdiff(rownames(cts_dat), gene_anno$ensembl_gene_id)## character(0)
model_s = meta$model_system
get_q75 <- function(x){quantile(x, probs = 0.75)}
gene_info = data.frame(Name = cts$Name,
t(apply(cts_dat, 1, tapply, model_s, min)),
t(apply(cts_dat, 1, tapply, model_s, median)),
t(apply(cts_dat, 1, tapply, model_s, get_q75)),
t(apply(cts_dat, 1, tapply, model_s, max)))
names(gene_info)[2:9] = paste0(rep(c("ExM_", "PrS_"), times=4),
rep(c("min", "median", "q75", "max"), each=2))
dim(gene_info)## [1] 38592 9
gene_info[1:2,]## Name ExM_min PrS_min ExM_median PrS_median ExM_q75
## ENSG00000268674 ENSG00000268674 0 0 0 0.0 0
## ENSG00000271254 ENSG00000271254 594 512 797 958.5 858
## PrS_q75 ExM_max PrS_max
## ENSG00000268674 0.00 0 1
## ENSG00000271254 1051.75 1079 1695
summary(gene_info[,-1])## ExM_min PrS_min ExM_median PrS_median
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 0.0 Median : 0.0 Median : 2.0 Median : 1.5
## Mean : 323.5 Mean : 335.2 Mean : 593.2 Mean : 647.1
## 3rd Qu.: 100.0 3rd Qu.: 71.0 3rd Qu.: 217.0 3rd Qu.: 177.5
## Max. :240570.0 Max. :209625.0 Max. :883830.0 Max. :382389.5
## ExM_q75 PrS_q75 ExM_max PrS_max
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 1.0 1st Qu.: 1
## Median : 3.8 Median : 3.0 Median : 8.0 Median : 8
## Mean : 731.5 Mean : 772.1 Mean : 1063.6 Mean : 1188
## 3rd Qu.: 279.0 3rd Qu.: 217.6 3rd Qu.: 400.2 3rd Qu.: 330
## Max. :1028917.5 Max. :454140.8 Max. :1675470.0 Max. :773629
table(gene_info$ExM_q75 > 0, gene_info$PrS_q75 > 0)##
## FALSE TRUE
## FALSE 12951 1098
## TRUE 1885 22658
w2kp = which(gene_info$ExM_q75 > 0 | gene_info$PrS_q75 > 0)
cts_dat = cts_dat[w2kp,]
dim(cts_dat)## [1] 25641 82
gene_info = gene_info[w2kp,]
meta$read_depth = colSums(cts_dat)/1e6
meta$q75 = apply(cts_dat, 2, quantile, probs = 0.75)
ggplot(meta, aes(x=read_depth, y=q75, color=ko_strategy, shape = model_system)) +
geom_point(size=2, alpha=0.7) + ggtitle("Gene counts") +
scale_shape_manual(values = c(7, 16)) +
xlab("read depth (million)") + ylab("75 percentile of gene expression") +
labs(color = "KO type", shape = "Model") +
scale_color_brewer(palette = "Set1")meta$run_id_short = str_replace(meta$run_id, "^[^-]*-", "")
ggplot(meta, aes(x=read_depth, y=q75, color=run_id_short, shape = model_system)) +
geom_point(size=2, alpha=0.7) + ggtitle("Gene counts") +
scale_shape_manual(values = c(7, 16)) +
xlab("read depth (million)") + ylab("75 percentile of gene expression") +
labs(color = "Run ID", shape = "Model") +
scale_color_brewer(palette = "Set1")sample2kp = which(meta$ko_gene == "WT")
cts_dat_m = cts_dat[,sample2kp]
meta_m = meta[sample2kp,]
summary(meta_m$q75)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 428.0 510.0 554.0 574.9 649.0 727.0
dim(cts_dat_m)## [1] 25641 21
stopifnot(all(meta_m$file_id == colnames(cts_dat_m)))
q75 = apply(cts_dat_m, 1, quantile, probs=0.75)
cts_dat_n = t(cts_dat_m[q75 > 0,])
cts_dat_n = log(median(meta_m$q75)*cts_dat_n/meta_m$q75 + 1)
dim(cts_dat_n)## [1] 21 24263
sample_pca = prcomp(cts_dat_n, center = TRUE, scale. = TRUE)
names(sample_pca)## [1] "sdev" "rotation" "center" "scale" "x"
pc_scores = sample_pca$x
eigen_vals = sample_pca$sdev^2
eigen_vals[1:5]/sum(eigen_vals)## [1] 0.47564883 0.12518353 0.07086713 0.05336636 0.02717944
pvs = round(eigen_vals[1:5]/sum(eigen_vals)*100,1)
pvs## [1] 47.6 12.5 7.1 5.3 2.7
PC_df = data.frame(pc_scores)
PC_df = cbind(PC_df, meta_m)
gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=model_system,
color=run_id_short)) +
geom_point(size=2.5, alpha=0.7) + scale_color_brewer(palette = "Dark2") +
scale_shape_manual(values = c(7, 15, 16, 17)) +
labs(color="KO gene", shape ="KO strategy") +
xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) +
ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) +
ggtitle("Wild type samples") +
guides(
color = guide_legend(title = NULL, order = 1),
shape = guide_legend(title = NULL, order = 2)
) +
theme(
legend.margin = margin(0, 0, 0, 0),
legend.box.just = "left",
legend.direction = "vertical"
)
print(gPC)## Generate sample information matrix
cols2kp = c("model_system", "q75")
sample_info = meta_m[,cols2kp]
dim(sample_info)## [1] 21 2
sample_info[1:2,]## model_system q75
## 59 PrS 554
## 25 PrS 428
sample_info$log_q75 = log(sample_info$q75)
dds = DESeqDataSetFromMatrix(cts_dat_m, colData=sample_info,
~ model_system + log_q75)
dds = DESeq(dds)
## association with read-depth
res_rd = results(dds, name = "log_q75")
res_rd = as.data.frame(res_rd)
dim(res_rd)## [1] 25641 6
res_rd[1:2,]## baseMean log2FoldChange lfcSE stat pvalue
## ENSG00000271254 944.5427577 0.1298778 0.3327253 0.3903454 0.6962812
## ENSG00000276345 0.3686712 -5.3837662 9.7887890 -0.5499931 0.5823241
## padj
## ENSG00000271254 0.8840768
## ENSG00000276345 NA
table(is.na(res_rd$padj))##
## FALSE TRUE
## 14702 10939
g0 = ggplot(res_rd %>% subset(!is.na(padj)), aes(x=pvalue)) +
geom_histogram(color="darkblue", fill="lightblue",
breaks = seq(0,1,by=0.01)) +
ggtitle("Read depth")
print(g0)## association with model_system
dds_lrt = DESeq(dds, test="LRT", reduced = ~ + log_q75)
res_lrt = results(dds_lrt)
res_model_system = as.data.frame(res_lrt)
dim(res_model_system)## [1] 25641 6
res_model_system[1:2,]## baseMean log2FoldChange lfcSE stat pvalue
## ENSG00000271254 944.5427577 0.1298778 0.3327253 31.92267820 1.604331e-08
## ENSG00000276345 0.3686712 -5.3837662 9.7887890 -0.01029894 1.000000e+00
## padj
## ENSG00000271254 4.7729e-08
## ENSG00000276345 1.0000e+00
table(is.na(res_model_system$padj))##
## FALSE TRUE
## 24642 999
g0 = ggplot(res_model_system %>% subset(!is.na(padj)), aes(x=pvalue)) +
geom_histogram(color="darkblue", fill="lightblue",
breaks = seq(0,1,by=0.01)) +
ggtitle("Model system")
print(g0)Explore the PC plots first, to decide running the models on what level of DE group.
Then run the DE analysis.
table(meta$run_id, meta$model_system)##
## ExM PrS
## 20230809_23-robson-007-run2 0 12
## 20231004_23-robson-011 22 0
## 20231124_23-robson-013 12 12
## 20240307_24-robson-002 0 12
## 20240307_24-robson-005 0 12
table(meta$run_id, meta$ko_gene)##
## BHLHE40 MEF2C MEIS1 MEIS2 MXD1 NCOA3 RUNX1 WT
## 20230809_23-robson-007-run2 0 0 0 0 9 0 0 3
## 20231004_23-robson-011 0 0 9 7 0 0 0 6
## 20231124_23-robson-013 0 9 0 0 0 0 9 6
## 20240307_24-robson-002 9 0 0 0 0 0 0 3
## 20240307_24-robson-005 0 0 0 0 0 9 0 3
table(meta$run_id, meta$model_system, meta$ko_gene)## , , = BHLHE40
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 0
## 20231004_23-robson-011 0 0
## 20231124_23-robson-013 0 0
## 20240307_24-robson-002 0 9
## 20240307_24-robson-005 0 0
##
## , , = MEF2C
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 0
## 20231004_23-robson-011 0 0
## 20231124_23-robson-013 9 0
## 20240307_24-robson-002 0 0
## 20240307_24-robson-005 0 0
##
## , , = MEIS1
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 0
## 20231004_23-robson-011 9 0
## 20231124_23-robson-013 0 0
## 20240307_24-robson-002 0 0
## 20240307_24-robson-005 0 0
##
## , , = MEIS2
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 0
## 20231004_23-robson-011 7 0
## 20231124_23-robson-013 0 0
## 20240307_24-robson-002 0 0
## 20240307_24-robson-005 0 0
##
## , , = MXD1
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 9
## 20231004_23-robson-011 0 0
## 20231124_23-robson-013 0 0
## 20240307_24-robson-002 0 0
## 20240307_24-robson-005 0 0
##
## , , = NCOA3
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 0
## 20231004_23-robson-011 0 0
## 20231124_23-robson-013 0 0
## 20240307_24-robson-002 0 0
## 20240307_24-robson-005 0 9
##
## , , = RUNX1
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 0
## 20231004_23-robson-011 0 0
## 20231124_23-robson-013 0 9
## 20240307_24-robson-002 0 0
## 20240307_24-robson-005 0 0
##
## , , = WT
##
##
## ExM PrS
## 20230809_23-robson-007-run2 0 3
## 20231004_23-robson-011 6 0
## 20231124_23-robson-013 3 3
## 20240307_24-robson-002 0 3
## 20240307_24-robson-005 0 3
table(meta$ko_strategy, meta$ko_gene)##
## BHLHE40 MEF2C MEIS1 MEIS2 MXD1 NCOA3 RUNX1 WT
## CE 3 3 3 3 3 3 3 0
## KO 3 3 3 1 3 3 3 0
## PTC 3 3 3 3 3 3 3 0
## WT 0 0 0 0 0 0 0 21
unique_model_ss = unique(meta$model_system)
unique_model_ss = sort(unique_model_ss)
for(model1 in unique_model_ss){
print(model1)
sample2kp = which(meta$model_system == model1)
cts_dat_m = cts_dat[,sample2kp]
meta_m = meta[sample2kp,]
table(meta_m$ko_strategy, str_extract(colnames(cts_dat_m), "^[^_]+_[^_]+"))
stopifnot(all(meta_m$file_id == colnames(cts_dat_m)))
q75 = apply(cts_dat_m, 1, quantile, probs=0.75)
cts_dat_n = t(cts_dat_m[q75 > 0,])
cts_dat_n = log(median(meta_m$q75)*cts_dat_n/meta_m$q75 + 1)
dim(cts_dat_n)
sample_pca = prcomp(cts_dat_n, center = TRUE, scale. = TRUE)
names(sample_pca)
pc_scores = sample_pca$x
eigen_vals = sample_pca$sdev^2
eigen_vals[1:5]/sum(eigen_vals)
pvs = round(eigen_vals[1:5]/sum(eigen_vals)*100,1)
pvs
PC_df = data.frame(pc_scores)
PC_df = cbind(PC_df, meta_m)
gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=ko_strategy, color=ko_gene)) +
geom_point(size=2.5, alpha=0.7) + scale_color_brewer(palette = "Dark2") +
scale_shape_manual(values = c(7, 15, 16, 17)) +
xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) +
ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) +
ggtitle(model1) +
guides(
color = guide_legend(title = NULL, order = 1),
shape = guide_legend(title = NULL, order = 2)
) +
theme(
legend.margin = margin(0, 0, 0, 0),
legend.box.just = "left",
legend.direction = "vertical"
)
print(gPC)
gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=run_id_short, color=ko_gene)) +
geom_point(size=2.5, alpha=0.7) + scale_color_brewer(palette = "Dark2") +
scale_shape_manual(values = c(7, 15, 16, 17)) +
xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) +
ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) +
ggtitle(model1) +
guides(
color = guide_legend(title = NULL, order = 1),
shape = guide_legend(title = NULL, order = 2)
) +
theme(
legend.margin = margin(0, 0, 0, 0),
legend.box.just = "left",
legend.direction = "vertical"
)
print(gPC)
gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=run_id_short, color=ko_gene,
alpha = ifelse(ko_gene == "WT", 1, 0.8))) +
geom_point(size=2.5) +
scale_color_brewer(palette = "Dark2") +
scale_shape_manual(values = c(7, 15, 16, 17)) +
xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) +
ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) +
ggtitle(model1) + guides(alpha = "none") +
guides(
color = guide_legend(title = NULL, order = 1),
shape = guide_legend(title = NULL, order = 2)
) +
theme(
legend.margin = margin(0, 0, 0, 0),
legend.box.just = "left",
legend.direction = "vertical"
)
print(gPC)
table(meta_m$run_id, meta_m$ko_gene)
}## [1] "ExM"
## [1] "PrS"
Under model system “Exm”, the batch effect caused by run id seems to be big. Run DE analysis for different run ids separately.
Under model system “PrS”, still run analysis together.
For the output files, create two versions, one with direct information, one with padj set to NA for the genes when the number of 0 per test is >=4.
In more details, for both versions of the outputs, make it one file per knockout gene per DE group (DE group can be model system, model system + condition, model system + run_id, model system + day, etc.).
The first version of the output files contains gene id, and for each knockout strategy:
baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
The second version of the output files contains gene id,
chr, position, gene name
and for each knockout strategy:
log2FoldChange, pvalue, padj (set to be NA if the number of 0 per test >= 4),
In addition, save out a separate file of the sample size for each comparison. This needs to be by DE group.
df_size = NULL
output_dir = "results/2024_05_JAX_RNAseq2_ExtraEmbryonic"
raw_output_dir = file.path(output_dir, "raw")
processed_output_dir = file.path(output_dir, "processed")
if (!file.exists(raw_output_dir)){
dir.create(raw_output_dir, recursive = TRUE)
}
if (!file.exists(processed_output_dir)){
dir.create(processed_output_dir, recursive = TRUE)
}
for(model1 in unique_model_ss){
print(model1)
sample2kp = which(meta$model_system == model1)
cts_dat_m = cts_dat[,sample2kp]
meta_m = meta[sample2kp,]
print(table(meta_m$ko_strategy, str_extract(colnames(cts_dat_m), "^[^_]+_[^_]+")))
stopifnot(all(meta_m$file_id == colnames(cts_dat_m)))
print(table(meta_m$file_id == colnames(cts_dat_m), useNA="ifany"))
## Generate sample information matrix
cols2kp = c("model_system", "ko_strategy", "ko_gene", "run_id", "q75")
sample_info = meta_m[,cols2kp]
dim(sample_info)
sample_info[1:2,]
print(table(sample_info$ko_strategy, sample_info$ko_gene))
sample_info$ko_group = paste(sample_info$ko_gene,
sample_info$ko_strategy, sep="_")
print(table(sample_info$ko_group))
print(length(unique(sample_info$ko_group)))
print(table(sample_info$run_id, sample_info$ko_group))
sample_info$log_q75 = log(sample_info$q75)
# run DESeq2 for the two run ids separately for model system ExM
if (model1 == "ExM"){
sample_info$de_grp = paste0(sample_info$model_system, "_", sample_info$run_id)
}else{
sample_info$de_grp = sample_info$model_system
}
sorted_de_grps = sort(unique(sample_info$de_grp))
for (d_group in sorted_de_grps){
dg_index = which(sample_info$de_grp==d_group)
cts_dat_m_dg = cts_dat_m[, dg_index]
sample_info_dg = sample_info[dg_index, ]
stopifnot(all(sample_info_dg$de_grp==d_group))
print("-----------------------------------")
print(paste0("DE analysis group: ", d_group))
if (sum(sample_info_dg$ko_gene=="WT")<2){
print(sprintf("DE analysis group %s has only %d wild type samples",
d_group, sum(sample_info_dg$ko_gene=="WT")))
print(sprintf("do not run DESeq2 for it"))
print("-----------------------------------")
break
}else{
print(sprintf("DE analysis group %s DESeq2 results:", d_group))
print("-----------------------------------")
}
# do two steps of filtering
# first, filter based on q75 of each gene
# second, filter based on whether each gene is expressed in at least 4 cells
dg_q75 = apply(cts_dat_m_dg, 1, quantile, probs=0.75)
cts_dat_m_dg = cts_dat_m_dg[dg_q75 > 0,]
print(sprintf("After filtering by gene expression q75, the number of genes reduces from %d to %d",
length(dg_q75), nrow(cts_dat_m_dg)))
n_pos = rowSums(cts_dat_m_dg>0)
cts_dat_m_dg = cts_dat_m_dg[n_pos >= 4,]
if (length(n_pos)==nrow(cts_dat_m_dg)){
print("After filtering by nonzero expression in at least 4 samples, the number of genes does not change.")
}else{
print(sprintf("After filtering by nonzero expression in at least 4 samples, the number of genes reduces from %d to %d",
length(n_pos), nrow(cts_dat_m_dg)))
}
# update the q75 based on only genes used in the process
sample_info_dg$q75 = apply(cts_dat_m_dg, 2, quantile, probs = 0.75)
sample_info_dg$log_q75 = log(sample_info_dg$q75)
if (length(unique(sample_info_dg$run_id))==1){
dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg,
~ ko_group + log_q75)
}else{
dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg,
~ ko_group + run_id + log_q75)
}
start.time <- Sys.time()
dds = DESeq(dds)
end.time <- Sys.time()
time.taken <- end.time - start.time
print(time.taken)
## association with read-depth
res_rd = results(dds, name = "log_q75")
res_rd = as.data.frame(res_rd)
dim(res_rd)
res_rd[1:2,]
table(is.na(res_rd$padj))
g0 = ggplot(res_rd %>% subset(!is.na(padj)), aes(x=pvalue)) +
geom_histogram(color="darkblue", fill="lightblue",
breaks = seq(0,1,by=0.01)) +
ggtitle(paste0(d_group, " read depth"))
print(g0)
## Rerun the analysis without read-depth if it is not significant for
## most genes, and the number of samples is small
## i.e., proportion of non-null in the distribution is smaller than 0.01
## (this needs to be restricted to the genes with not NA adjusted pvalue)
## or if smaller than 0.1 and the number of samples is not greater than 6
pi_1_rd = max(0, mean(res_rd$pvalue[!is.na(res_rd$padj)] < 0.05) - 0.05)
pi_1_rd = max(pi_1_rd, 0, 1 - 2*mean(res_rd$pvalue[!is.na(res_rd$padj)] > 0.5))
print(sprintf("prop of non-null for rd: %.2e", pi_1_rd))
if(pi_1_rd < 0.01 || (ncol(cts_dat_m_dg) <= 6 && pi_1_rd < 0.1 )){
print("rerun DESeq2 without read depth")
include_rd = 0
if (length(unique(sample_info_dg$run_id))==1){
dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg,
~ ko_group)
}else{
dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg,
~ ko_group + run_id)
}
start.time <- Sys.time()
dds = DESeq(dds)
end.time <- Sys.time()
time.taken <- end.time - start.time
print(time.taken)
}else{
include_rd = 1
}
## association with run_id
if (length(unique(sample_info_dg$run_id))>1){
if(include_rd){
dds_lrt = DESeq(dds, test="LRT", reduced = ~ ko_group + log_q75)
}else{
dds_lrt = DESeq(dds, test="LRT", reduced = ~ ko_group)
}
res_lrt = results(dds_lrt)
res_run_id = as.data.frame(res_lrt)
dim(res_run_id)
res_run_id[1:2,]
table(is.na(res_run_id$padj))
g0 = ggplot(res_run_id %>% subset(!is.na(padj)), aes(x=pvalue)) +
geom_histogram(color="darkblue", fill="lightblue",
breaks = seq(0,1,by=0.01)) +
ggtitle(paste0(d_group, " Run ID"))
print(g0)
}
## DE analysis for each KO gene
## for knockout gene MEIS2 and knockout strategy KO
## there is only one knockout sample
## need to set the padj for all genes to NA for this setting of comparison
table(sample_info_dg$ko_gene)
table(sample_info_dg$ko_gene, sample_info_dg$ko_strategy)
genos = setdiff(unique(sample_info_dg$ko_gene), "WT")
genos
ko_grp = c("CE", "KO", "PTC")
for(geno in genos){
res_geno_df = NULL
res_geno_reduced_df = NULL
res_geno = list()
for(k_grp1 in ko_grp){
res_g1 = results(dds, contrast = c("ko_group",
paste0(geno, "_", k_grp1), "WT_WT"))
res_g1 = signif(data.frame(res_g1), 3)
# add a column to record the number of zeros per test
test_index = which(sample_info_dg$ko_group%in%c(paste0(geno, "_", k_grp1), "WT_WT"))
cts_dat_m_dg_test = cts_dat_m_dg[, test_index]
n_zero = rowSums(cts_dat_m_dg_test==0)
res_g1$n_zeros = n_zero
# record the number of samples involved in the comparison
test_ko_group_vec = sample_info_dg$ko_group[test_index]
df_size = rbind(df_size,
c(d_group,
paste0(geno, "_", k_grp1),
sum(test_ko_group_vec!="WT_WT"),
sum(test_ko_group_vec=="WT_WT")))
# prepare a processed version of output
res_g1_reduced = res_g1[, c("log2FoldChange", "pvalue", "padj", "n_zeros")]
res_g1_reduced$padj[which(res_g1_reduced$n_zeros>=4)] = NA
if (sum(test_ko_group_vec!="WT_WT")==1){
res_g1_reduced$padj = NA
}
res_g1_reduced = res_g1_reduced[, c("log2FoldChange", "pvalue", "padj")]
res_geno[[k_grp1]] = res_g1_reduced
if (sum(test_ko_group_vec!="WT_WT")>1){
g1 = ggplot(res_g1_reduced %>% subset(!is.na(padj)), aes(x=pvalue)) +
geom_histogram(color="darkblue", fill="lightblue",
breaks = seq(0,1,by=0.01)) +
ggtitle(paste0(d_group, " ", geno, "_", k_grp1))
print(g1)
}
#tag1 = sprintf("%s_%s_%s_", d_group, geno, k_grp1)
tag1 = sprintf("%s_%s_", geno, k_grp1)
colnames(res_g1) = paste0(tag1, colnames(res_g1))
colnames(res_g1_reduced) = paste0(tag1, colnames(res_g1_reduced))
if(is.null(res_geno_df)){
res_geno_df = res_g1
}else if(!is.null(res_geno_df)){
stopifnot(all(rownames(res_geno_df) == rownames(res_g1)))
res_geno_df = cbind(res_geno_df, res_g1)
}
if(is.null(res_geno_reduced_df)){
res_geno_reduced_df = res_g1_reduced
}else if(!is.null(res_geno_reduced_df)){
stopifnot(all(rownames(res_geno_reduced_df) == rownames(res_g1_reduced)))
res_geno_reduced_df = cbind(res_geno_reduced_df, res_g1_reduced)
}
}
get_index <- function(x){
which(x$padj < 0.05 & abs(x$log2FoldChange) > log2(1.5))
}
# make an exception for the case of
# d_group = "ExM_20231004_23-robson-011"
# geno = "MEIS2"
if ((d_group=="ExM_20231004_23-robson-011")&(geno=="MEIS2")){
w_ce = get_index(res_geno[["CE"]])
w_ptc = get_index(res_geno[["PTC"]])
print(paste0("DE group ", d_group, " under KO gene ", geno, ":"))
print("There is no result for knock strategy KO due to number of knockout samples being only 1.")
print(paste0("number of DE genes from knock out strategy CE: ",
as.character(length(w_ce))))
print(paste0("number of DE genes from knock out strategy PTC: ",
as.character(length(w_ptc))))
ptc_ce_overlap = length(intersect(rownames(res_geno[["PTC"]])[w_ptc],
rownames(res_geno[["CE"]])[w_ce]))
print(paste0("number of common DE genes by knock out strategies PTC and CE: ",
as.character(ptc_ce_overlap)))
m = make_comb_mat(list("CE" = rownames(res_geno[["CE"]])[w_ce],
"PTC" = rownames(res_geno[["PTC"]])[w_ptc]))
g_up = UpSet(m)
print(g_up)
df1 = data.frame(padj_CE = res_geno[["CE"]]$padj,
padj_PTC = res_geno[["PTC"]]$padj)
cr1 = cor(df1$padj_PTC, df1$padj_CE, method = "spearman", use="pair")
g4 = ggplot(data = df1, mapping = aes(x = -log10(padj_PTC),
y = -log10(padj_CE))) +
geom_pointdensity() +
ggtitle(sprintf("%s, %s\nSpearman r = %.2f", d_group, geno, cr1)) +
scale_color_viridis()
print(g4)
}else{
w_ce = get_index(res_geno[["CE"]])
w_ko = get_index(res_geno[["KO"]])
w_ptc = get_index(res_geno[["PTC"]])
print(paste0("DE group ", d_group, " under KO gene ", geno, ":"))
print(paste0("number of DE genes from knock out strategy CE: ",
as.character(length(w_ce))))
print(paste0("number of DE genes from knock out strategy KO: ",
as.character(length(w_ko))))
print(paste0("number of DE genes from knock out strategy PTC: ",
as.character(length(w_ptc))))
ce_ko_overlap = length(intersect(rownames(res_geno[["CE"]])[w_ce],
rownames(res_geno[["KO"]])[w_ko]))
ko_ptc_overlap = length(intersect(rownames(res_geno[["KO"]])[w_ko],
rownames(res_geno[["PTC"]])[w_ptc]))
ptc_ce_overlap = length(intersect(rownames(res_geno[["PTC"]])[w_ptc],
rownames(res_geno[["CE"]])[w_ce]))
print(paste0("number of common DE genes by knock out strategies CE and KO: ",
as.character(ce_ko_overlap)))
print(paste0("number of common DE genes by knock out strategies KO and PTC: ",
as.character(ko_ptc_overlap)))
print(paste0("number of common DE genes by knock out strategies PTC and CE: ",
as.character(ptc_ce_overlap)))
m = make_comb_mat(list("CE" = rownames(res_geno[["CE"]])[w_ce],
"KO" = rownames(res_geno[["KO"]])[w_ko],
"PTC" = rownames(res_geno[["PTC"]])[w_ptc]))
g_up = UpSet(m)
print(g_up)
df1 = data.frame(padj_CE = res_geno[["CE"]]$padj,
padj_KO = res_geno[["KO"]]$padj,
padj_PTC = res_geno[["PTC"]]$padj)
cr1 = cor(df1$padj_PTC, df1$padj_CE, method = "spearman", use="pair")
cr2 = cor(df1$padj_PTC, df1$padj_KO, method = "spearman", use="pair")
g4 = ggplot(data = df1, mapping = aes(x = -log10(padj_PTC),
y = -log10(padj_CE))) +
geom_pointdensity() + ggtitle(sprintf("%s, %s\nSpearman r = %.2f", d_group, geno, cr1)) +
scale_color_viridis()
g5 = ggplot(data = df1, mapping = aes(x = -log10(padj_PTC),
y = -log10(padj_KO))) +
geom_pointdensity() + ggtitle(sprintf("%s, %s\nSpearman r = %.2f", d_group, geno, cr2)) +
scale_color_viridis()
print(g4)
print(g5)
}
dim(res_geno_df)
res_geno_df[1:2,]
dim(res_geno_reduced_df)
res_geno_reduced_df[1:2,]
res_df = data.frame(gene_ID=rownames(res_geno_df),
res_geno_df)
dim(res_df)
res_df[1:2,]
res_reduced_gene_anno = gene_anno[match(rownames(res_geno_reduced_df), gene_anno$ensembl_gene_id), ]
stopifnot(all(rownames(res_geno_reduced_df)==res_reduced_gene_anno$ensembl_gene_id))
# set gene hgnc_symbol that equal "" to NA
hgnc_symbol_vec = res_reduced_gene_anno$hgnc_symbol
hgnc_symbol_vec[which(hgnc_symbol_vec=="")] = NA
res_reduced_df = data.frame(gene_ID=rownames(res_geno_reduced_df),
hgnc_symbol=hgnc_symbol_vec,
chr=res_reduced_gene_anno$chr,
strand=res_reduced_gene_anno$strand,
start=res_reduced_gene_anno$start,
end=res_reduced_gene_anno$end,
res_geno_reduced_df)
print("hgnc_symbol empty string and NA tables:")
print(table(res_reduced_df$hgnc_symbol=="", useNA="ifany"))
print(table(is.na(res_reduced_df$hgnc_symbol)))
dim(res_reduced_df)
res_reduced_df[1:2,]
fwrite(res_df,
sprintf("%s/2024_05_JAX_RNAseq2_ExtraEmbryonic_%s_%s_DEseq2_raw.tsv",
raw_output_dir, d_group, geno),
sep="\t")
fwrite(res_reduced_df,
sprintf("%s/2024_05_JAX_RNAseq2_ExtraEmbryonic_%s_%s_DEseq2.tsv",
processed_output_dir, d_group, geno),
sep="\t")
}
}
}## [1] "ExM"
##
## MEF2C_CE MEF2C_KO MEF2C_PTC MEF2C_WT1 MEF2C_WT2 MEF2C_WT3 MEIS1_CE
## CE 3 0 0 0 0 0 3
## KO 0 3 0 0 0 0 0
## PTC 0 0 3 0 0 0 0
## WT 0 0 0 1 1 1 0
##
## MEIS1_KO MEIS1_PTC MEIS1_WT MEIS2_CE MEIS2_KO MEIS2_PTC MEIS2_WT
## CE 0 0 0 3 0 0 0
## KO 3 0 0 0 1 0 0
## PTC 0 3 0 0 0 3 0
## WT 0 0 3 0 0 0 3
##
## TRUE
## 34
##
## MEF2C MEIS1 MEIS2 WT
## CE 3 3 3 0
## KO 3 3 1 0
## PTC 3 3 3 0
## WT 0 0 0 9
##
## MEF2C_CE MEF2C_KO MEF2C_PTC MEIS1_CE MEIS1_KO MEIS1_PTC MEIS2_CE MEIS2_KO
## 3 3 3 3 3 3 3 1
## MEIS2_PTC WT_WT
## 3 9
## [1] 10
##
## MEF2C_CE MEF2C_KO MEF2C_PTC MEIS1_CE MEIS1_KO
## 20231004_23-robson-011 0 0 0 3 3
## 20231124_23-robson-013 3 3 3 0 0
##
## MEIS1_PTC MEIS2_CE MEIS2_KO MEIS2_PTC WT_WT
## 20231004_23-robson-011 3 3 1 3 6
## 20231124_23-robson-013 0 0 0 0 3
## [1] "-----------------------------------"
## [1] "DE analysis group: ExM_20231004_23-robson-011"
## [1] "DE analysis group ExM_20231004_23-robson-011 DESeq2 results:"
## [1] "-----------------------------------"
## [1] "After filtering by gene expression q75, the number of genes reduces from 25641 to 23897"
## [1] "After filtering by nonzero expression in at least 4 samples, the number of genes does not change."
## Time difference of 40.93954 secs
## [1] "prop of non-null for rd: 2.32e-01"
## [1] "DE group ExM_20231004_23-robson-011 under KO gene MEIS2:"
## [1] "There is no result for knock strategy KO due to number of knockout samples being only 1."
## [1] "number of DE genes from knock out strategy CE: 44"
## [1] "number of DE genes from knock out strategy PTC: 176"
## [1] "number of common DE genes by knock out strategies PTC and CE: 40"
## [1] "hgnc_symbol empty string and NA tables:"
##
## FALSE <NA>
## 18953 4944
##
## FALSE TRUE
## 18953 4944
## [1] "DE group ExM_20231004_23-robson-011 under KO gene MEIS1:"
## [1] "number of DE genes from knock out strategy CE: 61"
## [1] "number of DE genes from knock out strategy KO: 76"
## [1] "number of DE genes from knock out strategy PTC: 50"
## [1] "number of common DE genes by knock out strategies CE and KO: 31"
## [1] "number of common DE genes by knock out strategies KO and PTC: 30"
## [1] "number of common DE genes by knock out strategies PTC and CE: 29"
## [1] "hgnc_symbol empty string and NA tables:"
##
## FALSE <NA>
## 18953 4944
##
## FALSE TRUE
## 18953 4944
## [1] "-----------------------------------"
## [1] "DE analysis group: ExM_20231124_23-robson-013"
## [1] "DE analysis group ExM_20231124_23-robson-013 DESeq2 results:"
## [1] "-----------------------------------"
## [1] "After filtering by gene expression q75, the number of genes reduces from 25641 to 24649"
## [1] "After filtering by nonzero expression in at least 4 samples, the number of genes reduces from 24649 to 24060"
## Time difference of 12.74257 secs
## [1] "prop of non-null for rd: 0.00e+00"
## [1] "rerun DESeq2 without read depth"
## Time difference of 9.78231 secs
## [1] "DE group ExM_20231124_23-robson-013 under KO gene MEF2C:"
## [1] "number of DE genes from knock out strategy CE: 0"
## [1] "number of DE genes from knock out strategy KO: 1"
## [1] "number of DE genes from knock out strategy PTC: 0"
## [1] "number of common DE genes by knock out strategies CE and KO: 0"
## [1] "number of common DE genes by knock out strategies KO and PTC: 0"
## [1] "number of common DE genes by knock out strategies PTC and CE: 0"
## [1] "hgnc_symbol empty string and NA tables:"
##
## FALSE <NA>
## 18994 5066
##
## FALSE TRUE
## 18994 5066
## [1] "PrS"
##
## BMLHE40_CE BMLHE40_KO BMLHE40_PTC BMLHE40_WT MDX1_CE MDX1_KO MDX1_KOLF2
## CE 3 0 0 0 3 0 0
## KO 0 3 0 0 0 3 0
## PTC 0 0 3 0 0 0 0
## WT 0 0 0 3 0 0 3
##
## MDX1_PTC NC0A3_CE NC0A3_KO NC0A3_PTC NC0A3_WT RUNX1_CE RUNX1_KO RUNX1_PTC
## CE 0 3 0 0 0 3 0 0
## KO 0 0 3 0 0 0 3 0
## PTC 3 0 0 3 0 0 0 3
## WT 0 0 0 0 3 0 0 0
##
## RUNX1_WT1 RUNX1_WT2 RUNX1_WT3
## CE 0 0 0
## KO 0 0 0
## PTC 0 0 0
## WT 1 1 1
##
## TRUE
## 48
##
## BHLHE40 MXD1 NCOA3 RUNX1 WT
## CE 3 3 3 3 0
## KO 3 3 3 3 0
## PTC 3 3 3 3 0
## WT 0 0 0 0 12
##
## BHLHE40_CE BHLHE40_KO BHLHE40_PTC MXD1_CE MXD1_KO MXD1_PTC
## 3 3 3 3 3 3
## NCOA3_CE NCOA3_KO NCOA3_PTC RUNX1_CE RUNX1_KO RUNX1_PTC
## 3 3 3 3 3 3
## WT_WT
## 12
## [1] 13
##
## BHLHE40_CE BHLHE40_KO BHLHE40_PTC MXD1_CE MXD1_KO
## 20230809_23-robson-007-run2 0 0 0 3 3
## 20231124_23-robson-013 0 0 0 0 0
## 20240307_24-robson-002 3 3 3 0 0
## 20240307_24-robson-005 0 0 0 0 0
##
## MXD1_PTC NCOA3_CE NCOA3_KO NCOA3_PTC RUNX1_CE
## 20230809_23-robson-007-run2 3 0 0 0 0
## 20231124_23-robson-013 0 0 0 0 3
## 20240307_24-robson-002 0 0 0 0 0
## 20240307_24-robson-005 0 3 3 3 0
##
## RUNX1_KO RUNX1_PTC WT_WT
## 20230809_23-robson-007-run2 0 0 3
## 20231124_23-robson-013 3 3 3
## 20240307_24-robson-002 0 0 3
## 20240307_24-robson-005 0 0 3
## [1] "-----------------------------------"
## [1] "DE analysis group: PrS"
## [1] "DE analysis group PrS DESeq2 results:"
## [1] "-----------------------------------"
## [1] "After filtering by gene expression q75, the number of genes reduces from 25641 to 23756"
## [1] "After filtering by nonzero expression in at least 4 samples, the number of genes does not change."
## Time difference of 47.80283 secs
## [1] "prop of non-null for rd: 5.05e-01"
## [1] "DE group PrS under KO gene NCOA3:"
## [1] "number of DE genes from knock out strategy CE: 29"
## [1] "number of DE genes from knock out strategy KO: 107"
## [1] "number of DE genes from knock out strategy PTC: 1"
## [1] "number of common DE genes by knock out strategies CE and KO: 17"
## [1] "number of common DE genes by knock out strategies KO and PTC: 1"
## [1] "number of common DE genes by knock out strategies PTC and CE: 1"
## [1] "hgnc_symbol empty string and NA tables:"
##
## FALSE <NA>
## 18951 4805
##
## FALSE TRUE
## 18951 4805
## [1] "DE group PrS under KO gene BHLHE40:"
## [1] "number of DE genes from knock out strategy CE: 429"
## [1] "number of DE genes from knock out strategy KO: 412"
## [1] "number of DE genes from knock out strategy PTC: 739"
## [1] "number of common DE genes by knock out strategies CE and KO: 298"
## [1] "number of common DE genes by knock out strategies KO and PTC: 337"
## [1] "number of common DE genes by knock out strategies PTC and CE: 320"
## [1] "hgnc_symbol empty string and NA tables:"
##
## FALSE <NA>
## 18951 4805
##
## FALSE TRUE
## 18951 4805
## [1] "DE group PrS under KO gene MXD1:"
## [1] "number of DE genes from knock out strategy CE: 120"
## [1] "number of DE genes from knock out strategy KO: 344"
## [1] "number of DE genes from knock out strategy PTC: 90"
## [1] "number of common DE genes by knock out strategies CE and KO: 37"
## [1] "number of common DE genes by knock out strategies KO and PTC: 39"
## [1] "number of common DE genes by knock out strategies PTC and CE: 37"
## [1] "hgnc_symbol empty string and NA tables:"
##
## FALSE <NA>
## 18951 4805
##
## FALSE TRUE
## 18951 4805
## [1] "DE group PrS under KO gene RUNX1:"
## [1] "number of DE genes from knock out strategy CE: 1941"
## [1] "number of DE genes from knock out strategy KO: 351"
## [1] "number of DE genes from knock out strategy PTC: 2015"
## [1] "number of common DE genes by knock out strategies CE and KO: 173"
## [1] "number of common DE genes by knock out strategies KO and PTC: 188"
## [1] "number of common DE genes by knock out strategies PTC and CE: 1098"
## [1] "hgnc_symbol empty string and NA tables:"
##
## FALSE <NA>
## 18951 4805
##
## FALSE TRUE
## 18951 4805
colnames(df_size) = c("DE_group", "knockout_gene_strategy", "n_ko", "n_WT")
fwrite(df_size,
sprintf("%s/2024_05_JAX_RNAseq2_ExtraEmbryonic_DE_n_samples.tsv",
output_dir),
sep="\t")gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8135679 434.5 12621529 674.1 NA 12621529 674.1
## Vcells 35028473 267.3 79903722 609.7 65536 79903722 609.7
sessionInfo()## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] readxl_1.4.3 UpSetR_1.4.0
## [3] ComplexHeatmap_2.14.0 data.table_1.15.4
## [5] dplyr_1.1.4 ggpointdensity_0.1.0
## [7] viridis_0.6.5 viridisLite_0.4.2
## [9] DESeq2_1.38.3 SummarizedExperiment_1.28.0
## [11] Biobase_2.58.0 MatrixGenerics_1.10.0
## [13] matrixStats_1.3.0 GenomicRanges_1.50.2
## [15] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [17] S4Vectors_0.36.2 BiocGenerics_0.44.0
## [19] RColorBrewer_1.1-3 MASS_7.3-60
## [21] stringr_1.5.1 ggpubr_0.6.0
## [23] ggrepel_0.9.5 ggplot2_3.5.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-8 bit64_4.0.5 doParallel_1.0.17
## [4] httr_1.4.7 tools_4.2.3 backports_1.5.0
## [7] bslib_0.8.0 utf8_1.2.4 R6_2.5.1
## [10] DBI_1.2.3 colorspace_2.1-1 GetoptLong_1.0.5
## [13] withr_3.0.1 tidyselect_1.2.1 gridExtra_2.3
## [16] bit_4.0.5 compiler_4.2.3 cli_3.6.3
## [19] Cairo_1.6-2 DelayedArray_0.24.0 labeling_0.4.3
## [22] sass_0.4.9 scales_1.3.0 digest_0.6.37
## [25] rmarkdown_2.28 XVector_0.38.0 pkgconfig_2.0.3
## [28] htmltools_0.5.8.1 highr_0.11 fastmap_1.2.0
## [31] GlobalOptions_0.1.2 rlang_1.1.4 rstudioapi_0.16.0
## [34] RSQLite_2.3.7 farver_2.1.2 shape_1.4.6.1
## [37] jquerylib_0.1.4 generics_0.1.3 jsonlite_1.8.8
## [40] BiocParallel_1.32.6 car_3.1-2 RCurl_1.98-1.16
## [43] magrittr_2.0.3 GenomeInfoDbData_1.2.9 Matrix_1.5-4.1
## [46] Rcpp_1.0.13 munsell_0.5.1 fansi_1.0.6
## [49] abind_1.4-5 lifecycle_1.0.4 stringi_1.8.4
## [52] yaml_2.3.10 carData_3.0-5 zlibbioc_1.44.0
## [55] plyr_1.8.9 blob_1.2.4 parallel_4.2.3
## [58] crayon_1.5.3 lattice_0.22-6 Biostrings_2.66.0
## [61] annotate_1.76.0 circlize_0.4.16 KEGGREST_1.38.0
## [64] locfit_1.5-9.10 knitr_1.48 pillar_1.9.0
## [67] rjson_0.2.21 ggsignif_0.6.4 geneplotter_1.76.0
## [70] codetools_0.2-20 XML_3.99-0.17 glue_1.7.0
## [73] evaluate_0.24.0 foreach_1.5.2 vctrs_0.6.5
## [76] png_0.1-8 cellranger_1.1.0 gtable_0.3.5
## [79] purrr_1.0.2 tidyr_1.3.1 clue_0.3-65
## [82] cachem_1.1.0 xfun_0.47 xtable_1.8-4
## [85] broom_1.0.6 rstatix_0.7.2 tibble_3.2.1
## [88] iterators_1.0.14 AnnotationDbi_1.60.2 memoise_2.0.1
## [91] cluster_2.1.6